In [1]:
import pandas as pd;
import numpy as np;
import plotly.express as plx
from plotly.subplots import make_subplots
import plotly.graph_objects as go

artists=pd.read_csv("data/artists.csv")
tracks = pd.read_csv("data/tracks.csv")
tracks["release_year"] = tracks["release_date"].apply(lambda e: e.split("-")[0])

FINILETTI SIMONE, CADONI MATTEO, gruppo Q, Spotify Dataset¶

In [2]:
def playSong(id):
    print(f"https://open.spotify.com/embed/track/{id}?utm_source=generator")

Tracks Dataset¶

Most popular song

In [3]:
tracks.loc[tracks['popularity'] == 100,['name','artists']]
Out[3]:
name artists
93802 Peaches (feat. Daniel Caesar & Giveon) ['Justin Bieber', 'Daniel Caesar', 'Giveon']

Is popularity related to release date?¶

In [4]:
df=tracks.groupby("release_year",as_index=False)["popularity"].mean()
df = df.loc[(df['release_year'] != "1900") & (df['release_year']!= "2021")] #remove incorrect data
fig = plx.histogram(df, x='release_year', y='popularity', histfunc="avg")
fig.layout["yaxis"]["title"]="Avg Popularity"
fig.layout["xaxis"]["title"]="Release Year"
fig.show()

Is danceability related to tempo?¶

In [5]:
df = tracks.sort_values(by=['tempo']).loc[tracks['danceability'] != 0]
df['tempo'] = df['tempo'].round()
df = df.loc[(df['tempo'] > 60) & (df['tempo'] < 220)]
df["tempoType"]="(40-50)"
for i in range (50, 220, 10):
    df.loc[(df["tempo"]>=i),"tempoType"]=f"({i}-{i+10})"
df.sort_values(by="tempoType", ascending=True)
fig = plx.histogram(df, x='tempoType', y='danceability', histfunc="avg")
fig.layout["yaxis"]["title"]="Avg Danceability"
fig.layout["xaxis"]["title"]="Tempo Range"
fig.show()
In [6]:
print("Most Popular Song")
playSong(df.loc[df['popularity'] == 100].id.tolist()[0])
Most Popular Song
https://open.spotify.com/embed/track/4iJyoBOLtHqaGxP12qzhQI?utm_source=generator
In [7]:
print("Valence value 0 Song")
playSong(df.loc[df['valence'] == 0].sample(1).id.tolist()[0])
Valence value 0 Song
https://open.spotify.com/embed/track/3JHex6uBuFlHovOSEFQVsg?utm_source=generator
In [8]:
print("A song with tempo > 210")
playSong(df.loc[df['tempo'] > 210].sample(1).id.tolist()[0])
A song with tempo > 210
https://open.spotify.com/embed/track/2EAQidPRLsjAUB4AXHpaiT?utm_source=generator
In [9]:
print("Most Loud")
playSong(df.loc[df['loudness'] == df['loudness'].min()].id.tolist()[0])
Most Loud
https://open.spotify.com/embed/track/5N5AuKydDEm9XwHloVw7L7?utm_source=generator